1. Load Packages

source("./Mean Reversion/RMR.001 Load Packages.R") 

2. Load Data

pricing_data <- read_csv("./Mean Reversion/Raw Data/pricing data.csv", col_types = c("iTdddddddci")) 

3. Prepare Data Function

Description
Spreads Poloneix pricing data into wide format and filters data to a specified time resolution and time window.

Arguments
pricing_data: A dataframe containing pricing data from Poloneix gathered in tidy format.
time_resolution: The number of seconds that each observation spans. Takes values 300, 900, 1800, 7200, 14400, and 86400.
start_date: The start date of the time window.
end_date: The end date of the time window.

Value
Returns a dataframe consiting of the unix timestamp, date time, and the closing price of various currency pairs.

prepare_data <- function(pricing_data, time_resolution, start_date, end_date) { 
  df <- pricing_data %>% 
    filter(period == time_resolution, 
           date_time >= start_date, 
           date_time <= end_date) %>% 
    select(date_unix, date_time, close, currency_pair) %>% 
    spread(currency_pair, close) 
  return(df)
} 

4. Test Cointegration Function

Description
The Engle-Granger method is used to test for cointegration. This method is comprised of two steps: (1) Perform a linear regression of log(coin_y) on log(coin_x). (2) Perform an Augmented Dickey-Fuller test on the residuals from the linear regression estimated in (1). The ADF test specification is of a non-zero mean, no time-based trend, and one autoregressive lag.

Arguments
coin_y: A vector containing the pricing data for the dependent coin in the regression.
coin_x: A vector containing the pricing data for the independent coin in the regression.

Value
Returns the ADF test statistic for the given coin pair.

test_cointegration <- function(coin_y, coin_x) { 
  lm_model <- lm.fit(y = log(coin_y), x = cbind(1, log(coin_x)))  
  lm_residuals <- lm_model[["residuals"]] 
  adf_test <- ur.df(lm_residuals, type = "drift", lags = 1) 
  df_stat = adf_test@testreg[["coefficients"]][2, 3]
  return(df_stat) 
} 

5. Create Coin Pairs Function

Description
Two sets of currency pairs are examined: currency pairs where USDT is the quote currency and currency pairs where BTC is the quote currency. All combinations of coins are created within a given quote currency. Combinations that consist of the coin with itself are removed.

Arguments
quote_currency: A string indicating the quote currency of the currency pairs. Can take values USDT or BTC.

Value
Returns a dataframe containing the coin pairs.

create_pairs <- function(quote_currency) { 
  if (quote_currency == "USDT") {
    coin_list <- c("USDT_BTC", "USDT_DASH", "USDT_ETH", "USDT_LTC", "USDT_REP", "USDT_XMR", "USDT_ZEC")
  }
  if (quote_currency == "BTC") {
    coin_list <- c("BTC_DASH", "BTC_ETH", "BTC_LTC", "BTC_REP", "BTC_XEM", "BTC_XMR", "BTC_ZEC")
  }
  coin_pairs <- expand.grid(coin_list, coin_list) %>% 
    rename(coin_y = Var1, 
           coin_x = Var2) %>% 
    filter(coin_y != coin_x) %>% 
    mutate_if(is.factor, as.character) %>%
    as_tibble() 
  return(coin_pairs)
} 

6. Test Coin Pairs Function

Description
Test for cointegration between each coin pair generated by the create_pairs() function. The test for cointegration is performed by test_cointegration().

Arguments
train: A dataframe generated by prepare_data() that represents the training set for the coin pairs.
coin_pairs: A dataframe generated by create_pairs().

Value
Returns a dataframe containing the coin pairs and the ADF test statistic resulting from testing cointegration between each coin pair.

test_pairs <- function(train, coin_pairs) { 
  adf_stat <- numeric(nrow(coin_pairs))  
  for (n in 1:nrow(coin_pairs)) { 
    coin_y <- coin_pairs[[n, "coin_y"]] 
    coin_x <- coin_pairs[[n, "coin_x"]] 
    cointegration_results <- test_cointegration(coin_y = train[[coin_y]], coin_x = train[[coin_x]])
    adf_stat[n] <- cointegration_results
  } 
  df <- coin_pairs %>% 
    mutate(adf_stat = adf_stat) %>% 
    arrange(adf_stat)
  return(df) 
} 

7. Select Coin Pairs Function

Description
Select cointegrated coin pairs to be used in a mean reversion strategy. Coin pairs are tested for cointegration using test_pairs(). Coin pairs with an ADF test statistic below a certain threshold are selected.

Arguments
train: A dataframe generated by prepare_data() that represents the training set for the coin pair.
coin_pairs: A dataframe generated by create_pairs().

Value
Returns a dataframe containing the coin pairs that were selected.

select_pairs <- function(train, coin_pairs) { 
  df <- test_pairs(train = train, coin_pairs = coin_pairs) %>% 
    filter(adf_stat <= -3.43)
  return(df) 
} 

8. Train Model Function

Description
Performs rolling linear regression of log(coin y) on log(coin x) using a defined rolling window length over the test set.

Arguments
train: A dataframe generated by prepare_data() that represents the training set for the coin pair.
test: A dataframe generated by prepare_data() that represents the test set for the coin pair.
coin_y: A string indicating the dependent coin in the coin pair regression.
coin_x: A string indicating the independent coin in the coin pair regression.
rolling_window: The number of observations used in each iteration of a rolling linear regression.

Value
Returns a list containing the intercept and hedge ratio calculated from a rolling regression over the test set as well as the spread and z-score of the spread.

train_model <- function(train, test, coin_y, coin_x, rolling_window) { 
  
  # Perform rolling linear regression for the test set  
  rolling_coef <- bind_rows(train, test) %>%  
    mutate(y = log(.[[coin_y]]), 
           x = log(.[[coin_x]])) %>% 
    select(y, x) %>% 
    rollapply(data = ., 
              width = rolling_window, 
              FUN = function(df) { 
                df <- as_tibble(df)
                model <- lm.fit(y = df[["y"]], x = cbind(1, (df[["x"]])))
                return(model[["coefficients"]])}, 
              by.column = FALSE, 
              fill = NA, 
              align = "right") %>% 
    as_tibble() %>% 
    rename(intercept = x1, 
           hedge_ratio = x2) %>% 
    filter(row_number() > nrow(train)) 
  
  # Calculate spread in training and test set  
  train <- train %>% 
    mutate(spread = lm.fit(y = log(train[[coin_y]]), x = cbind(1, log(train[[coin_x]])))[["residuals"]]) 
  test <- test %>% 
    mutate(spread = log(test[[coin_y]]) - log(test[[coin_x]]) * rolling_coef[["hedge_ratio"]] - rolling_coef[["intercept"]]) 
  
  # Combine train and test to calculate rolling z-score for the test set  
  result <- bind_rows(train %>% mutate(source = "train"), 
                      test %>% mutate(source = "test")) %>% 
    mutate(rolling_mean = roll_mean(spread, n = rolling_window, fill = NA, align = "right"), 
           rolling_sd = roll_sd(spread, n = rolling_window, fill = NA, align = "right"), 
           spread_z = (spread - rolling_mean) / rolling_sd) %>% 
    filter(source == "test") 
  
  # Return list of statistics for the test set  
  return(list(intercept = rolling_coef[["intercept"]], 
              hedge_ratio = rolling_coef[["hedge_ratio"]], 
              spread = result[["spread"]], 
              spread_z = result[["spread_z"]]))
} 

9. Generate Signals Function

Description
Generate trading signals that indicate the current position in the spread formed by a trained model generated by
train_model(). The trained model generates the spread and z-score of the spread over the test set. A signal of +1 indicates a long position in the spread, 0 indicates a flat position, and -1 indicates a short position in the spread.

Arguments
train: A dataframe generated by prepare_data() that represents the training set for the coin pair.
test: A dataframe generated by prepare_data() that represents the test set for the coin pair.
coin_y: A string indicating the dependent coin in the coin pair regression.
coin_x: A string indicating the independent coin in the coin pair regression.
model: A trained model generated by train_model().

Value
Returns a vector containing the trading signal over the test set.

generate_signals <- function(train, test, coin_y, coin_x, model) { 
  df_signals <- test %>% 
    mutate(spread = model[["spread"]], 
           spread_z = model[["spread_z"]], 
           lag_spread_z = lag(spread_z, 1, default = 0), 
           signal_long = if_else(lag_spread_z <=  0.0 & lag_spread_z > -1.0, 0.25, 0), 
           signal_long = if_else(lag_spread_z <= -1.0 & lag_spread_z > -2.0, 0.50, signal_long), 
           signal_long = if_else(lag_spread_z <= -2.0 & lag_spread_z > -3.0, 0.75, signal_long), 
           signal_long = if_else(lag_spread_z <= -3.0 & lag_spread_z > -4.0, 1.00, signal_long), 
           signal_long = if_else(lag_spread_z <= -4.0 & lag_spread_z > -5.0, 1.00, signal_long), 
           signal_long = if_else(lag_spread_z <= -5.0 & lag_spread_z > -6.0, 1.00, signal_long), 
           signal_long = if_else(lag_spread_z <= -6.0 & lag_spread_z > -7.0, 0.00, signal_long), 
           signal_long = if_else(lag_spread_z <= -6.0, 0, signal_long), 
           signal_short = if_else(lag_spread_z >= 0.0 & lag_spread_z < 1.0, -0.25, 0), 
           signal_short = if_else(lag_spread_z >= 1.0 & lag_spread_z < 2.0, -0.50, signal_short), 
           signal_short = if_else(lag_spread_z >= 2.0 & lag_spread_z < 3.0, -0.75, signal_short), 
           signal_short = if_else(lag_spread_z >= 3.0 & lag_spread_z < 4.0, -1.00, signal_short), 
           signal_short = if_else(lag_spread_z >= 4.0 & lag_spread_z < 5.0, -1.00, signal_short), 
           signal_short = if_else(lag_spread_z >= 5.0 & lag_spread_z < 6.0, -1.00, signal_short), 
           signal_short = if_else(lag_spread_z >= 6.0 & lag_spread_z < 7.0, -0.00, signal_short), 
           signal_short = if_else(lag_spread_z >= 6.0, 0, signal_short), 
           signal = signal_long + signal_short, 
           signal = if_else(is.na(signal), 0, signal), 
           signal = if_else(cummin(lag_spread_z) <= -6.0, 0, signal), 
           signal = if_else(cummax(lag_spread_z) >=  6.0, 0, signal))  
  return(df_signals[["signal"]])
} 

10. Backtest Pair Function

Description
Calculate the return of a cointegration-based mean reversion trading strategy using coin y and coin x.

The current backtesting logic uses a model generated by train_model() and trading signals generated by generate_signals(). The coin_y_return and coin_x_return indicate the one period percentage return of each coin. The coin_y_position and coin_x_position indicate the market value in USD in each coin. coin_y_pnl and coin_x_pnl indicate the USD value of the profit and loss for each coin. The combined_position indicates the gross market value of the combined positions. The return is calculated relative to the maximum capital allocation to the given coin pair.

Arguments
train: A dataframe generated by prepare_data() that represents the training set for the coin pair.
test: A dataframe generated by prepare_data() that represents the test set for the coin pair.
coin_y: A string indicating the dependent coin in the coin pair regression.
coin_x: A string indicating the independent coin in the coin pair regression.
rolling_window: The number of observations used in each iteration of a rolling linear regression.

Value
Returns a vector containing the cumulative return of applying the trading strategy to the given coin pair.

backtest_pair <- function(train, test, coin_y, coin_x, rolling_window) { 
  model <- train_model(train = train, 
                       test = test, 
                       coin_y = coin_y, 
                       coin_x = coin_x, 
                       rolling_window = rolling_window)    
  df_backtest <- test %>% 
    mutate(signal = generate_signals(train = train, 
                                     test = test, 
                                     coin_y = coin_y, 
                                     coin_x = coin_x, 
                                     model = model), 
           coin_y_return = test[[coin_y]] / lag(test[[coin_y]], 1) - 1, 
           coin_x_return = test[[coin_x]] / lag(test[[coin_x]], 1) - 1, 
           coin_y_position = signal * 1                      *  1, 
           coin_x_position = signal * model[["hedge_ratio"]] * -1,  
           coin_y_pnl = lag(coin_y_position, 1) * coin_y_return, 
           coin_x_pnl = lag(coin_x_position, 1) * coin_x_return, 
           combined_position = abs(coin_y_position) + abs(coin_x_position), 
           combined_pnl = coin_y_pnl + coin_x_pnl, 
           combined_return = combined_pnl / (1 + abs(model[["hedge_ratio"]]))) %>% 
    mutate_all(funs(ifelse(is.na(.), 0, .))) %>% 
    mutate(return_pair = cumprod(1 + combined_return)) 
  return(df_backtest[["return_pair"]])
} 

11. Backtest Strategy Function

Description
Calculate the return of a cointegration-based mean reversion trading strategy using an equally weighted portfolio of cointegrated coin pairs. The cumulative return of each coin pair is calculated using return_pair() and the mean is taken.

Arguments
train: A dataframe generated by prepare_data() that represents the training set for the coin pair.
test: A dataframe generated by prepare_data() that represents the test set for the coin pair.
selected_pairs: A dataframe generated by select_coins() that represents a set of cointegrated coin pairs.
rolling_window: The number of observations used in each iteration of a rolling linear regression.

Value
A vector containing the cumulative return of the overall trading strategy.

backtest_strategy <- function(train, test, selected_pairs, rolling_window) { 
  if (nrow(selected_pairs) == 0) { 
    return(1) 
  } 
  df <- tibble()  
  for (i in 1:nrow(selected_pairs)) { 
    single_pair <- tibble(
      return_pair = backtest_pair(train = train, 
                                  test = test, 
                                  coin_y = selected_pairs[["coin_y"]][i], 
                                  coin_x = selected_pairs[["coin_x"]][i], 
                                  rolling_window = rolling_window), 
      coin_y = selected_pairs[["coin_y"]][i], 
      coin_x = selected_pairs[["coin_x"]][i], 
      date_time = test[["date_time"]]
    )
    df <- bind_rows(df, single_pair)
  } 
  df <- df %>% 
    group_by(date_time) %>% 
    summarise(return_strategy = mean(return_pair)) 
  return(df[["return_strategy"]])
} 

12. Plot Single Function

Description
Create plots of a cointegration-based mean reversion trading strategy of a single coin pair conprised of coin y and coin x. There are two plots created by this function. The first plot displays the spread transformed into z-score with three red lines at -2, 0, and 2. A green line indicates the signal which can take values -1, 0, and +1. The second plot displays the cumulative return of the model in blue. Two additional lines show the buy and hold return of coin y and coin x as red and green lines, respectively.

Arguments
train: A dataframe generated by prepare_data() that represents the training set for the coin pair.
test: A dataframe generated by prepare_data() that represents the test set for the coin pair.
coin_y: A string indicating the dependent coin in the coin pair regression.
coin_x: A string indicating the independent coin in the coin pair regression.
rolling_window: The number of observations used in each iteration of a rolling linear regression.

Value
Prints the plots described above.

plot_single <- function(train, test, coin_y, coin_x, rolling_window) { 
  model <- train_model(train = train, 
                       test = test, 
                       coin_y = coin_y, 
                       coin_x = coin_x, 
                       rolling_window = rolling_window)
  df_plot <- test %>% 
    mutate(spread = model[["spread"]], 
           spread_z = model[["spread_z"]], 
           signal = generate_signals(train = train, 
                                     test = test, 
                                     coin_y = coin_y, 
                                     coin_x = coin_x, 
                                     model = model), 
           return_pair = backtest_pair(train = train, 
                                       test = test, 
                                       coin_y = coin_y, 
                                       coin_x = coin_x, 
                                       rolling_window = rolling_window), 
           return_buyhold_y = test[[coin_y]] / test[[coin_y]][1], 
           return_buyhold_x = test[[coin_x]] / test[[coin_x]][1])
  print(ggplot(df_plot, aes(x = date_time)) + 
          geom_line(aes(y = spread_z, colour = "Spread Z"), size = 1) + 
          geom_line(aes(y = signal, colour = "Signal"), size = 0.5) + 
          geom_hline(yintercept = 0, colour = "red", alpha = 0.5) + 
          geom_hline(yintercept = 2, colour = "red", alpha = 0.5) + 
          geom_hline(yintercept = -2, colour = "red", alpha = 0.5) + 
          scale_color_manual(name = "Series", 
                             values = c("Spread Z" = "blue", 
                                        "Signal" = "green")) + 
          labs(title = "Spread vs Trading Signal", 
               subtitle = str_c(coin_y, " and ", coin_x), 
               x = "Date", 
               y = "Spread and Signal")) 
  print(ggplot(df_plot, aes(x = date_time)) + 
          geom_line(aes(y = return_pair, colour = "Model"), size = 1) + 
          geom_line(aes(y = return_buyhold_y, colour = "Coin Y"), size = 0.5, alpha = 0.4) + 
          geom_line(aes(y = return_buyhold_x, colour = "Coin X"), size = 0.5, alpha = 0.4) + 
          geom_hline(yintercept = 1, colour = "black") + 
          scale_color_manual(name = "Return", 
                             values = c("Model" = "darkblue", 
                                        "Coin Y" = "darkred", 
                                        "Coin X" = "darkgreen")) + 
          labs(title = "Model Return vs Buy Hold Return", 
               subtitle = str_c(coin_y, " and ", coin_x), 
               x = "Date", 
               y = "Cumulative Return"))
} 

13. Plot Many Function

Description
Create many plots by calling the plot_single() function multiple times. Also creates a plot showing the results of the overall strategy using backtest_strateg(). Creates a train and test set surrounding a cutoff date and creates plot for the top n coins ranked by their ADF statistic.

Arguments
pricing_data: A dataframe containing pricing data from Poloneix gathered in tidy format.
time_resolution: The number of seconds that each observation spans. Takes values 300, 900, 1800, 7200, 14400, and 86400.
cutoff_date: A data representing the cutoff date between the train and test sets.
train_window: A period object from lubridate representing the length of time the train set covers.
test_window: A period object from lubridate representing the length of time the the test set covers. quote_currency: A string indicating the quote currency of the currency pairs. Can take values USDT or BTC.
rolling_window: The number of observations used in each iteration of a rolling linear regression.
number_pairs: The number of pairs to generate plots for.

Value
Prints the plots described above.

plot_many <- function(pricing_data, time_resolution, cutoff_date, train_window, 
                      test_window, quote_currency, rolling_window, number_pairs) { 
  train <- prepare_data(pricing_data = pricing_data, 
                        time_resolution = time_resolution, 
                        start_date = as.Date(cutoff_date) - train_window, 
                        end_date = as.Date(cutoff_date)) 
  test <- prepare_data(pricing_data = pricing_data, 
                       time_resolution = time_resolution, 
                       start_date = as.Date(cutoff_date), 
                       end_date = as.Date(cutoff_date) + test_window) 
  coin_pairs <- create_pairs(quote_currency = quote_currency)
  selected_pairs <- select_pairs(train = train, 
                                 coin_pairs = coin_pairs)
  if (nrow(selected_pairs) == 0) { 
    return("No coin pairs selected.")
  } 
  print(selected_pairs) 
  for (i in 1:min(number_pairs, nrow(selected_pairs))) { 
    plot_single(train = train, 
                test = test, 
                coin_y = selected_pairs[["coin_y"]][i], 
                coin_x = selected_pairs[["coin_x"]][i], 
                rolling_window = rolling_window)
  } 
  test <- test %>% 
    mutate(return_strategy = backtest_strategy(train = train, 
                                               test = ., 
                                               selected_pairs = selected_pairs, 
                                               rolling_window = rolling_window)) 
  ggplot(test, aes(x = date_time)) + 
    geom_line(aes(y = return_strategy, colour = "Strategy"), size = 1) + 
    geom_line(aes(y = USDT_BTC / USDT_BTC[1], colour = "USDT_BTC"), size = 0.5, alpha = 0.4) + 
    geom_hline(yintercept = 1, colour = "black") + 
    scale_color_manual(name = "Return", 
                       values = c("Strategy" = "darkblue", 
                                  "USDT_BTC" = "darkred")) + 
    labs(title = "Strategy Return vs Buy Hold Return", 
         x = "Date", 
         y = "Cumulative Return") 
} 

14. Set Parameters

time_resolution <- 900
train_window <- days(32) 
test_window <- days(16) 
quote_currency <- "USDT" 
rolling_window <- 86400 / time_resolution * as.numeric(days(2)) / 86400 
number_pairs <- 3 
test_by <- "16 days" 

15. Cross Validation September 2017

plot_many(pricing_data = pricing_data,
          time_resolution = time_resolution,
          cutoff_date = "2017-09-01",
          train_window = train_window,
          test_window = test_window,
          quote_currency = quote_currency,
          rolling_window = rolling_window,
          number_pairs = number_pairs)
## # A tibble: 3 x 3
##     coin_y   coin_x  adf_stat
##      <chr>    <chr>     <dbl>
## 1 USDT_REP USDT_ZEC -5.340746
## 2 USDT_ZEC USDT_REP -5.282765
## 3 USDT_REP USDT_ETH -3.471493

16. Cross Validation August 2017

plot_many(pricing_data = pricing_data,
          time_resolution = time_resolution,
          cutoff_date = "2017-08-01",
          train_window = train_window,
          test_window = test_window,
          quote_currency = quote_currency,
          rolling_window = rolling_window,
          number_pairs = number_pairs)
## # A tibble: 8 x 3
##      coin_y    coin_x  adf_stat
##       <chr>     <chr>     <dbl>
## 1  USDT_ETH  USDT_ZEC -4.462054
## 2  USDT_ZEC  USDT_REP -4.361885
## 3  USDT_REP  USDT_ZEC -4.335836
## 4  USDT_ZEC  USDT_ETH -4.290393
## 5 USDT_DASH  USDT_XMR -4.016416
## 6  USDT_XMR USDT_DASH -4.000072
## 7  USDT_ETH  USDT_REP -3.695401
## 8  USDT_REP  USDT_ETH -3.464286

17. Cross Validation July 2017

plot_many(pricing_data = pricing_data,
          time_resolution = time_resolution,
          cutoff_date = "2017-07-01",
          train_window = train_window,
          test_window = test_window,
          quote_currency = quote_currency,
          rolling_window = rolling_window,
          number_pairs = number_pairs)
## # A tibble: 8 x 3
##      coin_y   coin_x  adf_stat
##       <chr>    <chr>     <dbl>
## 1  USDT_REP USDT_XMR -5.734962
## 2  USDT_XMR USDT_REP -5.685226
## 3  USDT_BTC USDT_REP -4.539502
## 4  USDT_REP USDT_BTC -4.485586
## 5  USDT_BTC USDT_XMR -4.190621
## 6  USDT_XMR USDT_BTC -4.063350
## 7 USDT_DASH USDT_ZEC -3.551029
## 8 USDT_DASH USDT_LTC -3.535247

18. Cross Validation June 2017

plot_many(pricing_data = pricing_data,
          time_resolution = time_resolution,
          cutoff_date = "2017-06-01",
          train_window = train_window,
          test_window = test_window,
          quote_currency = quote_currency,
          rolling_window = rolling_window,
          number_pairs = number_pairs)
## # A tibble: 14 x 3
##       coin_y    coin_x  adf_stat
##        <chr>     <chr>     <dbl>
##  1  USDT_REP USDT_DASH -6.302270
##  2 USDT_DASH  USDT_REP -6.164980
##  3 USDT_DASH  USDT_ZEC -4.838517
##  4  USDT_REP  USDT_ZEC -4.765335
##  5  USDT_ZEC USDT_DASH -4.419407
##  6 USDT_DASH  USDT_XMR -4.338766
##  7  USDT_REP  USDT_XMR -4.334303
##  8  USDT_XMR USDT_DASH -4.299602
##  9  USDT_ZEC  USDT_REP -4.132672
## 10  USDT_XMR  USDT_REP -4.062863
## 11  USDT_XMR  USDT_ZEC -3.974349
## 12 USDT_DASH  USDT_ETH -3.746119
## 13  USDT_XMR  USDT_ETH -3.539918
## 14  USDT_ZEC  USDT_XMR -3.518425

19. Cross Validation May 2017

plot_many(pricing_data = pricing_data,
          time_resolution = time_resolution,
          cutoff_date = "2017-05-01",
          train_window = train_window,
          test_window = test_window,
          quote_currency = quote_currency,
          rolling_window = rolling_window,
          number_pairs = number_pairs) 
## # A tibble: 23 x 3
##       coin_y    coin_x  adf_stat
##        <chr>     <chr>     <dbl>
##  1  USDT_LTC USDT_DASH -5.247891
##  2  USDT_LTC  USDT_BTC -5.055356
##  3  USDT_REP  USDT_ETH -4.910632
##  4 USDT_DASH  USDT_ZEC -4.609111
##  5 USDT_DASH  USDT_ETH -4.531939
##  6  USDT_LTC  USDT_ETH -4.516577
##  7  USDT_XMR  USDT_ZEC -4.484302
##  8  USDT_LTC  USDT_ZEC -4.362703
##  9 USDT_DASH  USDT_REP -4.351660
## 10  USDT_REP USDT_DASH -4.303321
## # ... with 13 more rows

plot_many(pricing_data = pricing_data,
          time_resolution = time_resolution,
          cutoff_date = "2017-05-15", 
          train_window = train_window,
          test_window = test_window,
          quote_currency = quote_currency,
          rolling_window = rolling_window,
          number_pairs = number_pairs)
## # A tibble: 17 x 3
##       coin_y    coin_x  adf_stat
##        <chr>     <chr>     <dbl>
##  1 USDT_DASH  USDT_ZEC -7.181604
##  2  USDT_ZEC USDT_DASH -7.104415
##  3  USDT_ZEC  USDT_ETH -5.300897
##  4  USDT_REP  USDT_ZEC -5.216806
##  5 USDT_DASH  USDT_REP -5.214211
##  6  USDT_ZEC  USDT_REP -5.169008
##  7  USDT_REP USDT_DASH -5.159208
##  8  USDT_ETH  USDT_ZEC -5.077258
##  9 USDT_DASH  USDT_ETH -4.360553
## 10  USDT_REP  USDT_ETH -4.194988
## 11  USDT_ETH USDT_DASH -3.964114
## 12  USDT_ETH  USDT_REP -3.851741
## 13  USDT_XMR  USDT_LTC -3.732961
## 14  USDT_XMR  USDT_ETH -3.729675
## 15  USDT_ZEC  USDT_XMR -3.703470
## 16  USDT_ETH  USDT_XMR -3.683674
## 17  USDT_LTC  USDT_XMR -3.629185

20. Cross Validation April 2017

plot_many(pricing_data = pricing_data,
          time_resolution = time_resolution,
          cutoff_date = "2017-04-01",
          train_window = train_window,
          test_window = test_window,
          quote_currency = quote_currency,
          rolling_window = rolling_window,
          number_pairs = number_pairs)
## # A tibble: 6 x 3
##      coin_y    coin_x  adf_stat
##       <chr>     <chr>     <dbl>
## 1  USDT_BTC  USDT_ZEC -3.921811
## 2  USDT_ZEC  USDT_BTC -3.894445
## 3 USDT_DASH  USDT_XMR -3.786044
## 4  USDT_XMR USDT_DASH -3.627830
## 5  USDT_XMR  USDT_ZEC -3.562814
## 6  USDT_ZEC  USDT_XMR -3.508866

21. Cross Validation March 2017

plot_many(pricing_data = pricing_data,
          time_resolution = time_resolution,
          cutoff_date = "2017-03-01",
          train_window = train_window,
          test_window = test_window,
          quote_currency = quote_currency,
          rolling_window = rolling_window,
          number_pairs = number_pairs) 
## # A tibble: 12 x 3
##      coin_y    coin_x  adf_stat
##       <chr>     <chr>     <dbl>
##  1 USDT_LTC  USDT_ZEC -4.204591
##  2 USDT_XMR  USDT_BTC -4.108816
##  3 USDT_XMR  USDT_LTC -4.039288
##  4 USDT_LTC  USDT_REP -3.989227
##  5 USDT_XMR USDT_DASH -3.973108
##  6 USDT_LTC  USDT_XMR -3.948913
##  7 USDT_XMR  USDT_ETH -3.862021
##  8 USDT_LTC  USDT_ETH -3.852363
##  9 USDT_XMR  USDT_ZEC -3.813835
## 10 USDT_LTC USDT_DASH -3.712336
## 11 USDT_XMR  USDT_REP -3.697045
## 12 USDT_LTC  USDT_BTC -3.596645

plot_many(pricing_data = pricing_data,
          time_resolution = time_resolution,
          cutoff_date = "2017-03-15",
          train_window = train_window,
          test_window = test_window,
          quote_currency = quote_currency,
          rolling_window = rolling_window,
          number_pairs = number_pairs)
## # A tibble: 14 x 3
##      coin_y    coin_x  adf_stat
##       <chr>     <chr>     <dbl>
##  1 USDT_LTC  USDT_REP -5.797087
##  2 USDT_LTC  USDT_ETH -5.559206
##  3 USDT_LTC  USDT_XMR -5.547670
##  4 USDT_LTC  USDT_BTC -5.212817
##  5 USDT_LTC USDT_DASH -4.969545
##  6 USDT_LTC  USDT_ZEC -4.934673
##  7 USDT_XMR  USDT_REP -4.924900
##  8 USDT_REP  USDT_XMR -4.899259
##  9 USDT_REP  USDT_LTC -4.221165
## 10 USDT_BTC  USDT_LTC -3.955425
## 11 USDT_XMR  USDT_LTC -3.922512
## 12 USDT_ETH  USDT_LTC -3.824949
## 13 USDT_REP  USDT_ETH -3.620907
## 14 USDT_ETH  USDT_REP -3.534860

22. Cross Validation February 2017

plot_many(pricing_data = pricing_data,
          time_resolution = time_resolution,
          cutoff_date = "2017-02-01",
          train_window = train_window,
          test_window = test_window,
          quote_currency = quote_currency,
          rolling_window = rolling_window,
          number_pairs = number_pairs)
## # A tibble: 7 x 3
##     coin_y    coin_x  adf_stat
##      <chr>     <chr>     <dbl>
## 1 USDT_REP  USDT_ETH -4.735592
## 2 USDT_REP USDT_DASH -4.488778
## 3 USDT_REP  USDT_BTC -4.291817
## 4 USDT_REP  USDT_XMR -4.282674
## 5 USDT_REP  USDT_ZEC -4.263626
## 6 USDT_REP  USDT_LTC -4.182799
## 7 USDT_LTC  USDT_XMR -3.660178

23. Cross Validation January 2017

plot_many(pricing_data = pricing_data,
          time_resolution = time_resolution,
          cutoff_date = "2017-01-01",
          train_window = train_window,
          test_window = test_window,
          quote_currency = quote_currency,
          rolling_window = rolling_window,
          number_pairs = number_pairs)
## # A tibble: 10 x 3
##      coin_y    coin_x  adf_stat
##       <chr>     <chr>     <dbl>
##  1 USDT_REP  USDT_ETH -5.179556
##  2 USDT_REP  USDT_XMR -4.969760
##  3 USDT_REP  USDT_ZEC -4.911000
##  4 USDT_REP  USDT_LTC -4.743256
##  5 USDT_REP  USDT_BTC -4.470370
##  6 USDT_REP USDT_DASH -4.430406
##  7 USDT_ETH  USDT_REP -3.687047
##  8 USDT_LTC  USDT_XMR -3.637524
##  9 USDT_BTC  USDT_XMR -3.571980
## 10 USDT_XMR  USDT_BTC -3.476249

24. Cross Validation Full

cutoff_dates <- seq(ymd("2017-01-01"), ymd("2017-10-01"), by = test_by)
results <- tibble() 
for (i in cutoff_dates) {  
  i <- as.Date(i) 
  print(str_c("Cross validating strategy."))
  print(str_c("Using train set from ", i - train_window , " to ", i, ".")) 
  print(str_c("Using test set from ", i, " to ", i + test_window, "."))  
  train <- prepare_data(pricing_data = pricing_data, 
                        time_resolution = time_resolution, 
                        start_date = i - train_window, 
                        end_date = i) 
  test <- prepare_data(pricing_data = pricing_data, 
                       time_resolution = time_resolution, 
                       start_date = i, 
                       end_date = i + test_window) 
  coin_pairs <- create_pairs(quote_currency = quote_currency) 
  test <- test %>% 
    mutate(return_strategy = backtest_strategy(train = train, 
                                               test = test, 
                                               selected_pairs = select_pairs(train = train, coin_pairs = coin_pairs), 
                                               rolling_window = rolling_window), 
           return_strategy_change = return_strategy / lag(return_strategy, 1) - 1) %>% 
    mutate_all(funs(ifelse(is.na(.), 0, .)))
  results <- bind_rows(results, test) 
} 
## [1] "Cross validating strategy."
## [1] "Using train set from 2016-11-30 to 2017-01-01."
## [1] "Using test set from 2017-01-01 to 2017-01-17."
## [1] "Cross validating strategy."
## [1] "Using train set from 2016-12-16 to 2017-01-17."
## [1] "Using test set from 2017-01-17 to 2017-02-02."
## [1] "Cross validating strategy."
## [1] "Using train set from 2017-01-01 to 2017-02-02."
## [1] "Using test set from 2017-02-02 to 2017-02-18."
## [1] "Cross validating strategy."
## [1] "Using train set from 2017-01-17 to 2017-02-18."
## [1] "Using test set from 2017-02-18 to 2017-03-06."
## [1] "Cross validating strategy."
## [1] "Using train set from 2017-02-02 to 2017-03-06."
## [1] "Using test set from 2017-03-06 to 2017-03-22."
## [1] "Cross validating strategy."
## [1] "Using train set from 2017-02-18 to 2017-03-22."
## [1] "Using test set from 2017-03-22 to 2017-04-07."
## [1] "Cross validating strategy."
## [1] "Using train set from 2017-03-06 to 2017-04-07."
## [1] "Using test set from 2017-04-07 to 2017-04-23."
## [1] "Cross validating strategy."
## [1] "Using train set from 2017-03-22 to 2017-04-23."
## [1] "Using test set from 2017-04-23 to 2017-05-09."
## [1] "Cross validating strategy."
## [1] "Using train set from 2017-04-07 to 2017-05-09."
## [1] "Using test set from 2017-05-09 to 2017-05-25."
## [1] "Cross validating strategy."
## [1] "Using train set from 2017-04-23 to 2017-05-25."
## [1] "Using test set from 2017-05-25 to 2017-06-10."
## [1] "Cross validating strategy."
## [1] "Using train set from 2017-05-09 to 2017-06-10."
## [1] "Using test set from 2017-06-10 to 2017-06-26."
## [1] "Cross validating strategy."
## [1] "Using train set from 2017-05-25 to 2017-06-26."
## [1] "Using test set from 2017-06-26 to 2017-07-12."
## [1] "Cross validating strategy."
## [1] "Using train set from 2017-06-10 to 2017-07-12."
## [1] "Using test set from 2017-07-12 to 2017-07-28."
## [1] "Cross validating strategy."
## [1] "Using train set from 2017-06-26 to 2017-07-28."
## [1] "Using test set from 2017-07-28 to 2017-08-13."
## [1] "Cross validating strategy."
## [1] "Using train set from 2017-07-12 to 2017-08-13."
## [1] "Using test set from 2017-08-13 to 2017-08-29."
## [1] "Cross validating strategy."
## [1] "Using train set from 2017-07-28 to 2017-08-29."
## [1] "Using test set from 2017-08-29 to 2017-09-14."
## [1] "Cross validating strategy."
## [1] "Using train set from 2017-08-13 to 2017-09-14."
## [1] "Using test set from 2017-09-14 to 2017-09-30."
## [1] "Cross validating strategy."
## [1] "Using train set from 2017-08-29 to 2017-09-30."
## [1] "Using test set from 2017-09-30 to 2017-10-16."
results <- results %>% 
  mutate(return_strategy_cumulative = cumprod(1 + return_strategy_change), 
         date_time = as.POSIXct(date_time, origin = "1970-01-01")) 
ggplot(results, aes(x = date_time)) + 
  geom_line(aes(y = return_strategy_cumulative), colour = "blue", size = 1) + 
  geom_hline(yintercept = 1, colour = "black") + 
  labs(title = "Strategy Return vs Buy Hold Return", x = "Date", y = "Cumulative Return") 

print(results[["return_strategy_cumulative"]][nrow(results)]) 
## [1] 1.713087